# Scrapy settings for example project
#
# For simplicity, this file contains only the most important settings by
# default. All the other settings are documented here:
#
#     http://doc.scrapy.org/topics/settings.html
#
SPIDER_MODULES = ['example.spiders']
NEWSPIDER_MODULE = 'example.spiders'

USER_AGENT = 'scrapy-redis (+https://github.com/rolando/scrapy-redis)'

#指定一个去重方法给request对象去重
DUPEFILTER_CLASS = "scrapy_redis.dupefilter.RFPDupeFilter"
#调度器对应类——指定schedule队列
SCHEDULER = "scrapy_redis.scheduler.Scheduler"
#调度器内容持久化——队列中的内容是否持久保存，为False的时候会在关闭redis的时候清空redis
SCHEDULER_PERSIST = True
#SCHEDULER_QUEUE_CLASS = "scrapy_redis.queue.SpiderPriorityQueue"
#SCHEDULER_QUEUE_CLASS = "scrapy_redis.queue.SpiderQueue"
#SCHEDULER_QUEUE_CLASS = "scrapy_redis.queue.SpiderStack"

ITEM_PIPELINES = {
    'example.pipelines.ExamplePipeline': 300,
    'scrapy_redis.pipelines.RedisPipeline': 400,    #scrapy_redis实现的item保存到redis的pipeline
}

LOG_LEVEL = 'DEBUG'

# Introduce an artifical delay to make use of parallelism. to speed up the
# crawl.
DOWNLOAD_DELAY = 1

REDIS_URL = "redis://127.0.0.1:6379"    #指定redis的地址

# REDIS_URL = "redis://192.168.207.130.6379"    #redis的地址可以携程如下形式
# REDIS_HOST = "192.168.207.134"
# REDIS_PORT = 6379